
# Follow up - effect on discussion ---------------------------------------------------------------


fu_choices %>% count_prop(pair_includes_trans, follow_up_choose_comparator, follow_up_choose_trans)

fu_choices %>% count_prop(pair_includes_trans, item_diff)

fu_choices %>% mutate(
  pair_includes_trans = factor(pair_includes_trans)
) %>%
  filter(phase == "phase_2") %>%
  bar_chart(x = discuss_type, y = follow_up_choose_comparator, fill = pair_includes_trans)

fu_choices %>% mutate(
  pair_includes_trans = factor(pair_includes_trans)
) %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>%
  bar_chart(x = discuss_type, y = follow_up_choose_comparator, fill = pair_includes_trans)

fu_choices %>% count_prop(pair_includes_trans_alt, video_type_placebo,
                          discuss_type, discussion_full)

fu_choices %>% count_nas()

models_follow_up <- list(
  "Chose worker in\nfollow-up round (=1)" = feols_custom(
    follow_up_choose_comparator ~ pair_includes_trans * (discussion_full),
    data = fu_choices %>% filter(discuss_type %in% c("control", "discussion_full")) %>% filter(!is.na(follow_up_choose_comparator)),
    fixef = c(NULL),
    cluster = "group_id",
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = "^pair_includes_trans$|(^pair_includes_trans\\:discussion_full$)|^discussion_full$|item_diff|r2_reliability_diff|r2_reliability_shown",
    coef_omit = "group_control|stratum_id|delivery_incentive_exp"
  ),
  "Chose worker in\nfollow-up round (=1)" = feols_custom(
    follow_up_choose_comparator ~ pair_includes_trans_alt + pair_includes_trans_alt * (video_type_placebo + video_type_treatment + discussion_full + factor(stratum_id) + delivery_incentive_exp),
    data = fu_choices %>% filter(discuss_type %in% c("control", "discussion_full")) %>% filter(!is.na(follow_up_choose_comparator)),
    fixef = c("stratum_id", "delivery_incentive_exp"),
    cluster = "group_id",
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = "^pair_includes_trans$|(^pair_includes_trans_alt\\:discussion_full$)|^discussion_full$|item_diff|r2_reliability_diff|r2_reliability_shown",
    coef_omit = "group_control|stratum_id|delivery_incentive_exp"
  ),
  "Chose trans in\nfollow-up round (=1)\n(pairs with trans only)" = feols_custom(
    follow_up_choose_trans ~ video_type_placebo + video_type_treatment + discussion_full + factor(stratum_id) + delivery_incentive_exp,
    data = fu_choices %>% filter(discuss_type %in% c("control", "discussion_full")) %>% filter(!is.na(follow_up_choose_trans)),
    fixef = c("stratum_id", "delivery_incentive_exp"),
    cluster = "group_id",
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = "^pair_includes_trans$|(^pair_includes_trans_alt\\:discussion_full$)|^discussion_full$|item_diff|r2_reliability_diff|r2_reliability_shown",
    coef_omit = "group_control|stratum_id|delivery_incentive_exp"
  )
)

tex_export(
  models_follow_up,
  file = "outputs/tables/follow_up.tex",
  coef_rename = coef_label,
  gof_map = fe_label_no_fe,
  coef_omit = vars_to_regex(c("stratum_id", "Intercept", "delivery", "pair_includes_trans_alt$", "video")),
  dep_means = list(
    "Mean: No discussion (private) + worker is non-trans" = "discuss_type == 'control' & pair_includes_trans == FALSE",
    "Mean: No discussion (private) + worker is T" = "discuss_type == 'control' & pair_includes_trans == TRUE"
  ),
  add_rows = list_to_add_rows(control_rows)
)


((models_follow_up[[1]] %>% get_coeff("pair_includes_trans")) * -100) %>%
  write_stat("outputs/stats/trans_penalty_follow_up.tex", digits = 1)


models_follow_up[[1]] %>% get_p_val("pair_includes_trans") %>% write_stat(
  "outputs/stats/p_val_trans_penalty_follow_up.tex",
  digits = 2, p_value = TRUE
)

models_follow_up[[3]] %>% get_p_val("discussion_full") %>% write_stat(
  "outputs/stats/p_val_discussion_follow_up.tex",
  digits = 2, p_value = TRUE
)

models_follow_up[[3]] %>%
  get_coeff("discussion_full") %>%
  times_100 %>%
  write_stat("outputs/stats/effect_discussion_follow_up.tex", digits = 0)


# Average length of time between outcome and follow-up
follow_up_lag <- df_follow_up %>%
  select(matches("date")) %>%
  count_prop(first_survey_date) %>%
  tidylog::filter(year(survey_date) == 2023) %>%
  mutate(first_survey_date = coalesce(ymd(first_survey_date), dmy(first_survey_date))) %>%
  mutate(diff_date = interval(first_survey_date, survey_date) %/% days(1)) %>%
  count_prop(diff_date)


follow_up_lag %>% hist_basic(diff_date)

# Mean follow-up lag:
follow_up_lag %>%
  get_mean(diff_date) %>%
  write_stat("outputs/stats/follow_up_lag_mean.tex", digits = 0)


follow_up_lag %>%
  get_mean(diff_date) %>%
  write_stat("outputs/stats/follow_up_lag_mean_round.tex", digits = 0)

# Get SD
follow_up_lag %>%
  pull(diff_date) %>%
  sd(na.rm = TRUE) %>%
  write_stat("outputs/stats/follow_up_lag_sd.tex", digits = 0)


follow_up_lag %>% hist_basic(diff_date)

follow_up_lag %>% filter(diff_date <0) %>%
  glimpse
# hist_basic(diff_date)








# ATTRITION RATES

attrition <- df %>%
  # filter(phase == "phase_1") %>%
  tidylog::left_join(df_follow_up %>% mutate(follow_up_yn = TRUE),
                     by = "ind_id", suffix = c("", "_fu")) %>%
  mutate(
    follow_up_yn = ifelse(is.na(follow_up_yn), FALSE, follow_up_yn)
  )

attrition %>%
  # filter(phase == "phase_1") %>%
  get_mean(follow_up_yn) %>%
  write_percentage("outputs/stats/attrition_perc.tex", digits = 1)

differential_attrition <- list(
  "3-person discussion sample\n(Phase 1 + 2)" = feols_custom(
    follow_up_yn ~ discussion_full,
    data = attrition %>% filter(discuss_type %in% c("control", "discussion_full")),
    cluster = "group_id"
  ),
  "Rights videos (all participants)" = feols_custom(
    follow_up_yn ~ video_type_placebo + video_type_treatment,
    data = attrition %>%
      mutate(control_video_alt = video_type == "control") %>%
      select(-discuss_type),
    cluster = "group_id"
  )
)

tex_export(
  differential_attrition,
  file = "outputs/tables/differential_attrition.tex",
  coef_rename = coef_label,
  gof_map = fe_label_no_fe,
  coef_omit = vars_to_regex(c("stratum_id", "Intercept", "delivery", "pair_includes_trans_alt$")),
  dep_means = list(
    "Mean: No discussion (private)" = "discuss_type == 'control'",
    "Mean: Control video" = "control_video_alt == TRUE"
  ),
  additional_header = vec_to_custom_header(c(
    " ", rep("Dep var: Follow-up survey completed (=1)", 2)
  ))
)





differential_attrition[[1]]$coefficients["discussion_full"] %>% write_stat("outputs/stats/attrition_diff.tex", digits = 3)
differential_attrition[[1]] %>% get_p_val("discussion_full") %>% write_stat("outputs/stats/attrition_diff_p.tex", digits = 2)


# Difference between follow up and baseline control ---------------------------------------------------------------

df_follow_up

r2_fu_long <- list(
  "r2" = r2_choices_num,
  "fu" = fu_choices
) %>%
  map(select, -matches("d2|group_label|hiring_refusal_reason|hiring_choice_refusal|survey_status|survey_partial|refusal|survey")) %>%
  bind_rows(.id = "survey_type") %>%
  filter(!is.na(follow_up_choose_trans) | pair_includes_trans == 1) %>%
  count_prop(follow_up_choose_trans, r2_choose_trans) %>%
  mutate(r2_choose_trans = coalesce(follow_up_choose_trans, r2_choose_trans))


diff_r2_fu <- feols_custom(
  r2_choose_trans ~ video_type_placebo + video_type_treatment + survey_type + factor(stratum_id) + delivery_incentive_exp,
  data = r2_fu_long %>% filter(discuss_type %in% c("control")),
  fixef = c("stratum_id", "delivery_incentive_exp"),
  cluster = "group_id"
)

diff_r2_fu %>% get_coeff("survey_typer2") %>% times_100 %>% {. * -1} %>% write_stat("outputs/stats/diff_r2_fu.tex", digits = 1)
diff_r2_fu %>% get_p_val("survey_typer2") %>% write_stat("outputs/stats/diff_r2_fu_p.tex", digits = 2, p_value = TRUE)

# Follow up - figure ---------------------------------------------------------------

fu_n <- fu_choices %>%
  group_by(discuss_type, discussion_full, pair_includes_trans) %>%
  summarise(n_ind = n_distinct(ind_id),
            n_obs = n(),
            n_groups = n_distinct(group_id)) %>%
  group_by(discuss_type) %>%
  mutate(n_ind = max(n_ind)) %>%
  mutate(discuss = discussion_full == 1) %>%
  ungroup


alpha <- 1

fu_choices %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>%
  mutate(
    control_non_trans = !pair_includes_trans & discussion_full == 0,
    treat_non_trans = !pair_includes_trans & discussion_full == 1,
    control_trans = pair_includes_trans & discussion_full == 0,
    treat_trans = pair_includes_trans & discussion_full == 1,
  ) %>%
  mutate(across(c(control_non_trans, treat_non_trans, control_trans, treat_trans), as.numeric)) %>%

  feols_custom(
    follow_up_choose_comparator ~ (control_trans + treat_non_trans + treat_trans),
    data = .,
    cluster = "group_id",
    ri = FALSE
  ) %>%

  tidy_90 %>%

  filter(str_detect(term, "(control_trans|treat_non_trans|treat_trans)$")) %>%

  add_row(term = "omitted", estimate = 0, std.error = 0) %>%
  mutate(
    pair_includes_trans = str_detect(term, "control_trans|treat_trans"),
    discuss = as.numeric(str_detect(term, "treat"))
  ) %>%

  left_join(fu_n %>% filter(discuss_type %in% c("control", "discussion_full"))) %>%

  mutate(
    discuss_label = ifelse(discussion_full==1, "3-person discussion", "No discussion (private)") %>%
      fct_relevel("No discussion (private)") %>%
      fct_label_append(lab_append = paste0("\n(N=", n_ind, ")")),
    pair_includes_trans_label = ifelse(pair_includes_trans==1, "Worker\nis trans", "Worker\nis non-trans") %>% paste0(., "\n(Obs=", n_obs, ")")
  ) %>%

  ggplot(aes(x = pair_includes_trans_label, colour = factor(pair_includes_trans))) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "skyblue") +
  geom_errorbar(aes(ymin = conf.low_90, ymax = conf.high_90), alpha = alpha, position = position_dodge(0.8), show.legend = F, width = 0, size = 1.2) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), alpha = alpha, position = position_dodge(0.8), show.legend = F, width = 0, size = 0.7) +
  geom_point(aes(y = estimate), alpha = alpha, position = position_dodge(0.8), show.legend = F, size = 3) +
  theme_minimal() +
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major = element_blank()) +
  labs(x = element_blank(), y = "Effect on probability of selecting worker\n in follow-up round") +
  facet_wrap(~ discuss_label, scales = "free_x") +
  ylim(-0.21, 0.02)

ggsave("outputs/figs/follow_up_graph_point.pdf", width = 4, height = 4)



# Follow-up videos ---------------------------------------------------------------

models_follow_up_videos <- list(
  "Chose worker in\nfollow-up round (=1)" = feols_custom(
    follow_up_choose_comparator ~ pair_includes_trans * (video_type_placebo + video_type_treatment),
    data = fu_choices %>% filter(!is.na(follow_up_choose_comparator)),
    fixef = c(NULL),
    cluster = "group_id",
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_omit = "stratum_id|Intercept",
    control_group = list("video_type_control" = c("video_type_placebo", "video_type_treatment"))
  ),
  "Chose worker in\nfollow-up round (=1)" = feols_custom(
    follow_up_choose_comparator ~ pair_includes_trans_alt + pair_includes_trans_alt * (video_type_placebo + video_type_treatment + treat_type_r2_label + factor(stratum_id) + delivery_incentive_exp),
    data = fu_choices %>% filter(!is.na(follow_up_choose_comparator)),
    fixef = c("stratum_id", "delivery_incentive_exp"),
    cluster = "group_id",
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = "pair_includes_trans_alt$|video|pair_includes_trans_alt\\:video",
    control_group = list("video_type_control" = c("video_type_placebo", "video_type_treatment"))
  ),
  "Chose trans in\nfollow-up round (=1)\n(pairs with trans only)" = feols_custom(
    follow_up_choose_trans ~ video_type_placebo + video_type_treatment + treat_type_r2_label + factor(stratum_id) + delivery_incentive_exp,
    data = fu_choices %>% filter(!is.na(follow_up_choose_trans)),
    fixef = c("stratum_id", "delivery_incentive_exp"),
    cluster = "group_id",
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = "pair_includes_trans_alt$|video|pair_includes_trans_alt\\:video",
    control_group = list("video_type_control" = c("video_type_placebo", "video_type_treatment"))
  )
)


tex_export(
  models_follow_up_videos,
  file = "outputs/tables/follow_up_video.tex",
  coef_rename = coef_label,
  gof_map = fe_label_no_fe,
  coef_omit = vars_to_regex(c("stratum_id", "Intercept", "delivery", "pair_includes_trans_alt$", "discussion")),
  dep_means = list(
    "Mean: Control video + worker is non-trans" = "video_type == 'control' & pair_includes_trans == FALSE",
    "Mean: Control video + worker is T" = "video_type == 'control' & pair_includes_trans == TRUE"
  ),
  add_rows = list_to_add_rows(control_rows),
)

c(
  models_follow_up_videos[[1]] %>% get_p_val("pair_includes_trans:video_type_treatment"),
  models_follow_up_videos[[1]] %>% get_p_val("pair_includes_trans:video_type_placebo"),
  models_follow_up_videos[[2]] %>% get_p_val("pair_includes_trans:video_type_treatment"),
  models_follow_up_videos[[2]] %>% get_p_val("pair_includes_trans:video_type_placebo"),
  models_follow_up_videos[[3]] %>% get_p_val("video_type_treatment"),
  models_follow_up_videos[[3]] %>% get_p_val("video_type_placebo")
) %>%
  write_range("outputs/stats/follow_up_video_pvals.tex")

get_estimates(models_follow_up_videos[[1]])


# Attrition phase 2 ---------------------------------------------------------------

attrition %>% filter(phase == "phase_2") %>% bar_chart(x = discuss_type, y = follow_up_yn, fill = discuss_type, val_label = TRUE)

attrition %>% filter(phase == "phase_2") %>% count_prop(follow_up_yn)

# 197:
attrition %>% count_prop(follow_up_yn)


attrition %>% filter(discuss_type %in% c("control", "discussion_full")) %>% bar_chart(x = discuss_type, y = follow_up_yn, fill = discuss_type, val_label = TRUE)


attrition %>% bar_chart(x = video_type, y = follow_up_yn, fill = video_type, val_label = TRUE)


# Time profile of follow up effects ---------------------------------------------------------------

fu_choices %>%
  count_prop(first_survey_date) %>%
  tidylog::filter(year(survey_date) == 2023) %>%
  mutate(first_survey_date = coalesce(ymd(first_survey_date), dmy(first_survey_date))) %>%
  mutate(diff_date = interval(first_survey_date, survey_date) %/% days(1)) %>%
  count_prop(diff_date) %>%
  mutate(
    diff_date_cat = cut_interval(diff_date, n = 4)
  ) %>%
  filter(!is.na(diff_date)) %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>%
  bar_chart(y = follow_up_choose_trans, x = diff_date_cat, fill = discuss_type)


feols_custom(
  follow_up_choose_trans ~ video_type_placebo + video_type_treatment + discussion_full * diff_date_high + factor(stratum_id) + delivery_incentive_exp,
  data = fu_choices %>% filter(discuss_type %in% c("control", "discussion_full")) %>%
    filter(!is.na(follow_up_choose_trans)) %>% mutate(diff_date_high = diff_date >= median(diff_date, na.rm = TRUE), diff_date = diff_date / 30),
  fixef = c("stratum_id", "delivery_incentive_exp", "phase", "first_survey_date_week_fes"),
  cluster = "group_id",
  var_types = var_types_spec,
  coef_omit = "stratum_id|Intercept"
) %>%
  list() %>%
  tex_export()

fu_choices %>% get_mean("diff_date")


# Is phase 1 / phase 2 diffferent ---------------------------------------------------------------

feols_custom(
  follow_up_choose_trans ~ video_type_placebo + video_type_treatment + discussion_full * phase + factor(stratum_id) + delivery_incentive_exp,
  data = fu_choices %>% filter(discuss_type %in% c("control", "discussion_full")) %>% filter(!is.na(follow_up_choose_trans)),
  fixef = c("stratum_id", "delivery_incentive_exp"),
  cluster = "group_id",
  var_types = var_types_spec,
  coef_omit = "stratum_id|Intercept"
) %>%
  list() %>%
  tex_export()

# Interactions ---------------------------------------------------------------

feols_custom(
  follow_up_choose_trans ~ (video_type_placebo + video_type_treatment) * discussion_full + factor(stratum_id) + delivery_incentive_exp,
  data = fu_choices %>% filter(discuss_type %in% c("control", "discussion_full")) %>% filter(!is.na(follow_up_choose_trans)),
  fixef = c("stratum_id", "delivery_incentive_exp"),
  cluster = "group_id",
  var_types = var_types_spec,
  coef_omit = "stratum_id|Intercept"
) %>%
  tidy_90 %>%
  print_all


# Correlation between follow up and main survey ---------------------------------------------------------------


r2_by_ind <- r2_choices_num %>%
  filter(pair_includes_trans == 1) %>%
  group_by(ind_id) %>%
  filter(discuss_type == "control") %>%
  summarise(r2_choose_trans = mean_na(r2_choose_comparator))

fu_choices_by_ind <- fu_choices %>%
  filter(pair_includes_trans==1) %>%
  group_by(ind_id) %>%
  filter(discuss_type == "control") %>%
  summarise(follow_up_choose_trans = mean_na(follow_up_choose_comparator))

r2_fu <- tidylog::full_join(
  r2_by_ind,
  fu_choices_by_ind
)

cor_r2_fu <- cor.test(r2_fu$r2_choose_trans, r2_fu$follow_up_choose_trans)


cor_r2_fu$p.value %>% write_stat("outputs/stats/cor_r2_fu_pval.tex", p_value = TRUE)
cor_r2_fu$estimate %>% write_stat("outputs/stats/cor_r2_fu_estimate.tex")


# Comparison to R1 vs R2 correlation ----------------------------------------------------------------------------------------------------------------

r2_r1 <- tidylog::left_join(
  r2_by_ind,
  r1_choices_ind %>% select(group_id, ind_id, r1_choose_trans = p_self_selected_trans)
) %>%
  left_join(df %>% select(group_id, discuss_type)) %>%
  filter(discuss_type == "control")

cor_r2_r1 <- cor.test(r2_r1$r2_choose_trans, r2_r1$r1_choose_trans)

cor_r2_r1$p.value %>% write_stat("outputs/stats/cor_r2_r1_pval.tex", p_value = TRUE)
cor_r2_r1$estimate %>% write_stat("outputs/stats/cor_r2_r1_estimate.tex")


# Decay rate ----------------------------------------------------------------------------------------------------------------


models_follow_up[[3]]$coefficients["discussion_full"] / model_list[[1]]$coefficients["pair_includes_trans:discussion_full"]


# Phase 2 treatments - long-run effects ----------------------------------------------------------------------------------------------------------------

fu_choices %>% count_prop(phase, discuss_type, discussion_pair_listener)

models_mechs_follow_up <- list(
  "Chose trans in\nfollow-up round (=1)\n(pairs with trans only)" = feols_custom(
    follow_up_choose_trans ~ video_type_placebo + video_type_treatment + discussion_full + discussion_pair_speaker + discussion_pair_listener + public + factor(stratum_id) + delivery_incentive_exp,
    data = fu_choices %>% filter(phase == "phase_2") %>% filter(!is.na(follow_up_choose_trans)),
    fixef = c("stratum_id", "delivery_incentive_exp"),
    cluster = "group_id"
  )
)


p_vals_mechs_follow_up <- models_mechs_follow_up %>%
  purrr::map(
    get_comparison_p_vals, dummy_vars = c("control", "discussion_full", "discussion_pair_speaker", "discussion_pair_listener", "public"),
    ri = FALSE,
  ) %>%
  map(filter, row_number() %in% c(2,3,4, 7, 8, 12)) %>%
  map(~ .x %>% mutate(across(c(base, term), coef_label)))


tex_export(
  models_mechs_follow_up,
  file = "outputs/tables/mechs_follow_up.tex",
  coef_rename = coef_label,
  gof_map = fe_label_no_fe,
  coef_reorder = c(
    "discussion_full",
    "discussion_pair_speaker",
    "discussion_pair_listener",
    "public"
  ),
  stat_vec = "({std.error}) [{p.value}]",
  coef_omit = vars_to_regex(c("stratum_id", "Intercept", "video", "group_control", "phase", "pair_includes_trans_alt$", control_vars, "delivery")),
  dep_means = list(
    "Mean: No discussion (private) + worker is non-trans" = "discuss_type == 'control' & pair_includes_trans == FALSE",
    "Mean: No discussion (private) + worker is trans" = "discuss_type == 'control' & pair_includes_trans == TRUE"
  ),
  add_rows = p_vals_to_add_rows(p_vals_mechs_follow_up)
)

models_mechs_follow_up[[1]] %>% get_p_val("discussion_pair_listener") %>% write_stat("outputs/stats/p_val_listener_fu.tex", digits = 2, p_value = TRUE)
models_mechs_follow_up[[1]] %>% get_coeff("discussion_pair_listener") %>% times_100 %>% write_stat("outputs/stats/coeff_listener_fu.tex", digits = 0)


# Follow up group prediction ----------------------------------------------------------------------------------------------------------------

feols_custom(
  fml = group_predic_choose_trans ~ discussion_full + item_diff + reliability_diff + reliability_shown,
  fixef = c("stratum_id", "video_type", "phase"),
  cluster = "group_id",
  lasso = TRUE,
  lasso_options = list(
    potential_controls = control_vars,
    t = c("discussion_full"),
    interact = NULL,
    group_control = FALSE
  ),
  data = fu_group_predic %>% filter(discuss_type %in% c("control", "discussion_full")) %>%
    filter(!is.na(group_predic_choose_trans))
) %>%
  list() %>%
  tex_export(dep_means = list(
    "Mean: No discussion (private)" = "discuss_type == 'control'",
    "Mean: 3-person discussion" = "discuss_type == 'discussion_full'"
  ))

fu_group_predic %>% count_prop(discussion_full)